import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from scrapy_readbook_44.items import ScrapyReadbook44Item


class ReadSpider(CrawlSpider):
    name = "read"
    allowed_domains = ["www.dushu.com"]
    start_urls = ["https://www.dushu.com/book/1188_1.html"]

    rules = (Rule(LinkExtractor(allow=r"/book/1188_\d+\.html"),
                  callback="parse_item",
                  follow=True),)

    def parse_item(self, response):
        # //div[@class="book-info"]//a/img/@alt
        # //div[@class="book-info"]//a/img/@data-original
        img_list = response.xpath('//div[@class="book-info"]//a/img')
        for i in img_list:
            name = i.xpath('./@alt').extract_first()
            src = i.xpath('./@data-original').extract_first()

            book = ScrapyReadbook44Item(name=name, src=src)
            yield book
